## "Where are the missing dead" replication
##
## Reproduce: Average Death per Accident (Traffic vs. Others)
##
## 			  - Figure 4
## 
## Input:     event_data.dta

## Guoer Liu
## 7/30/2022 


setwd("~/Dropbox/Missing dead/replication/")


library(haven)
library(DataCombine) # InsertRow()
library(scales)
library(extrafont) ## change fonts in plot


### Function ###

paraPlot <- function(collapMat, 
					matrix.x.lab = NULL, matrix.title = NULL,
					l.mar = 3.5,
					hline = NULL,
					x.axis.vec = NULL,
					y.lab = TRUE, y.lab.names = NULL,
					y.min = NULL, y.max = NULL,
					legend = TRUE, leg.pos = NULL){
	# Args:
	#	collapMat:      n x 2 matrix (column 1: control; column 2: treat)
	#   matrix.x.lab:   a string of figure x axis label
	#	matrix.title:	a string of figure title
	#	hline:			a scalar indicate the position of vertical line
	#	x.axis.vec:		a length n string vector that shows x axis
	#   y.lab:			logical parameter to turn off y label
	#   y.lab.names:    a string of y axis lable
	#   y.min:			a scalar indicates the minimum value on y axis
	#   y.max:			a scalar indicates the maximum value on y axis
	#   legend:			logical parameter to turn off legend
	#	leg.pos:		a string indicates legend position
	pch.cex <- 1.6
	ctl.v <- collapMat[, 1]
	tr.v <- collapMat[, 2]
	#y.max <- max(x)
	#y.min <- min(x)
	x.len <- nrow(collapMat)

	par(mar= c(4, l.mar, 1, 1))
	plot(NA, xlim = c(0.5, x.len), 
	         ylim = c(y.min, y.max), 
	         type = "n", 
	         xlab = matrix.x.lab,
	         main = matrix.title, 
	         ylab = "", axes = F, cex.lab = 1.3)
	grid(nx = NULL, ny = NULL,
     	 lty = 2,      # Grid line type
     	 col = "gray", # Grid line color
     	 lwd = 0.6)
	abline(v = hline, col = alpha("orangered", 0.8), lwd = 1.5)
	lines(ctl.v, lty = 2, lwd = 1.5)
	points(ctl.v, cex = pch.cex, pch = 1, col = "black")
	lines(tr.v, lty = 1, lwd = 1.5)
	points(tr.v, cex = pch.cex, pch = 19, col = "black", bg = "black")
	axis(1, at = seq(1, x.len, 1), label = x.axis.vec, 
		 cex.axis = 1, las = 1)
	if (y.lab == TRUE){
		axis(2, at = seq(ceiling(y.min), floor(y.max), 1), 
			label = seq(ceiling(y.min), floor(y.max), 1),
			cex.axis = 1, las = 1)
		title(ylab = y.lab.names, line = 2, cex.lab = 1)
	}
	if (legend == TRUE){
		legend(leg.pos, title = NULL, 
			   legend = c("Treated", "Control"), 
			   col = "black", 
			   lty = c(1, 2), cex = 0.8,
			   pch = c(19, 1),
			   lwd = c(1, 1))
	}
}


mydata <- read_dta("event_data.dta")
mydata <- as.data.frame(mydata)

mydata <- subset(mydata, year > 1999 & year < 2006 & death <30)

treat_group <- ifelse((mydata$province_CN == 8 | 
					   mydata$province_CN == 16 | 
					   mydata$province_CN == 24 | 
					   mydata$province_CN == 31) & 
					   is.na(mydata$province_CN)==F, 1, 0)

traf_mean_pytr <- tapply(mydata$death[mydata$category_manu==5], 
						list(mydata$year[mydata$category_manu==5], 
							 treat_group[mydata$category_manu==5]), 
						     function(x) mean(x, na.rm = T))

traf_mean_pytr <- as.data.frame(traf_mean_pytr)
traf_mean_pytr <- InsertRow(traf_mean_pytr, rep(NA, 2), 1)
rownames(traf_mean_pytr) <- seq(2000, 2005, 1)

othr_mean_pytr <- tapply(mydata$death[mydata$category_manu!=5], 
						list(mydata$year[mydata$category_manu!=5], 
							 treat_group[mydata$category_manu!=5]), 
							 function(x) mean(x, na.rm = T))

#pdf("f4_avgDeath.pdf", family = "Times New Roman",
#	 width = 8, height = 3.8)
par(mfrow = c(1, 2))
paraPlot(traf_mean_pytr, "Time", "Traffic",
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = TRUE, y.lab.names = "Average Death per Acc.",
		y.min = 1.9, y.max = 10.1,
		legend = FALSE, leg.pos = "bottomright")
paraPlot(othr_mean_pytr, "Time", "Others", l.mar = 1.5,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = FALSE,
		y.min = 1.9, y.max = 11.1,
		legend = TRUE, leg.pos = "topright")
#dev.off()
















